/*

This file (part 1) contains the first steps of obtaining the actual results.
In order, the code does the following:

* Define a number of subprograms that are called repeatedly, to simplify the process
* Get and prepare the data (gathered in a separate file - see gen_data.do).
* Run all analyses and store the results.
* Save necessary matrices and final dataset to be used for meta-analyses in step 2.

Part 2 contains the following steps:
* Aggregate and meta-analyze results from all individual analyses.
* Generate output files (tables and figures) based on the above.

The raw data can only be accessed after ethical clearance by a Swedish ethics
review board and a subsequent data order from the various sources used. The
procedure is described in the README file. However, once a sufficient level of
aggregation is reached, the data is freely available. We have constructed the
following code so that anyone who wishes to replicate the results can follow
from part 2 and onwards using the supplied final data files.

*/

clear all




************************************
* PROGRAMS THAT NEED TO BE DEFINED *
************************************

/*
The program get_estimates does three things:
First, it standardizes the predictor and the outcome. This is necessary (as
opposed to simply using a "beta" flag), since the "absorb" flag precludes this
with the reghdfe command.
Second, it runs the model specified by the arguments (see below).
Third, it stores all necessary estimates in the set of matrices corresponding
to the predictor.

Takes the following arguments:
1. Outcome variable (string)
2. Predictor variable (string)
3. List of controls (string)
4. Matrix model slot to use (int)
5. Post-command (used for absorbing or not) (string)
6. Number of outcome index (int)
*/
capture program drop get_estimates
program define get_estimates

	di "Running model for `1', `2'"

	* Standardize
	qui:su `1' if include_indicator==1 & `2'!=.
	qui:gen temp_y = (`1'-r(mean))/r(sd)
	qui:replace temp_y=. if include_indicator==. | `2'==.
	qui:su `2' if include_indicator==1 & `1'!=.
	qui:gen temp_x = (`2'-r(mean))/r(sd)
	qui:replace temp_x=. if include_indicator==. | `1'==.
	
	* Run regression and store
	qui:reghdfe temp_y temp_x `3' if include_indicator==1, cluster(LopNrParID) `5'
	matrix mat_`2'[`6',`4'] = _b[temp_x]
	matrix mat_`2'_beta[`6',`4'] = _b[temp_x]
	matrix mat_`2'_se[`6',`4']=_se[temp_x]
	matrix mat_`2'_var[`6',`4']=_se[temp_x]^2
	matrix mat_`2'_t[`6',`4'] = _b[temp_x]/_se[temp_x]
	matrix mat_`2'_p[`6',`4'] = 2*ttail(e(df_r),abs(_b[temp_x]/_se[temp_x]))
	matrix mat_`2'_n[`6',`4'] = e(N)
	
	drop temp_x temp_y
	
end


/*
The program copymodel takes estimates from all matrix slots corresponding
to a specified model (predictor/outcome and model spec) and copies to another
model slot index. This is useful for renormalizing by outcome scaling by simply
flipping the sign of the coefficient (as opposed to rerunning all models with
the reversed outcome scale).

Takes the following arguments
1. Predictor variable 8str)
2. Outcome number (int)
3. Model slot from (int)
4. Model slot to (int)
*/
capture program drop copymodel
program define copymodel

	local indep = "`1'"
	local outcome = `2'
	local modelfrom = `3'
	local modelto = `4'
	
	matrix mat_`indep'[`outcome',`modelto'] = mat_`indep'[`outcome',`modelfrom']
	matrix mat_`indep'_beta[`outcome',`modelto'] = mat_`indep'_beta[`outcome',`modelfrom']
	matrix mat_`indep'_se[`outcome',`modelto'] = mat_`indep'_se[`outcome',`modelfrom']
	matrix mat_`indep'_var[`outcome',`modelto'] = mat_`indep'_var[`outcome',`modelfrom']
	matrix mat_`indep'_t[`outcome',`modelto'] = mat_`indep'_t[`outcome',`modelfrom']
	matrix mat_`indep'_p[`outcome',`modelto'] = mat_`indep'_p[`outcome',`modelfrom']
	matrix mat_`indep'_n[`outcome',`modelto'] = mat_`indep'_n[`outcome',`modelfrom']

end






********************
* DATA PREPARATION *
********************

* Some necessary inits
clear
set matsize 3000

* Load dataset
cd "C:\Userdata\Shared\Dofiles\PrelDoFiles\PrelRafael\Quantifying bias\replication_code"
use "C:\Userdata\Shared\Dofiles\PrelDoFiles\PrelRafael\Quantifying bias\dataset.dta", clear

* Standardize IQ variable by birth year (avoids Flynn effect artefacts)
egen mean_IQ = mean(IQ) if Sex==0, by(BirthYear)
egen sd_IQ = sd(IQ) if Sex==0, by(BirthYear)
gen stdIQ = (IQ-mean_IQ)/sd_IQ
drop mean_IQ sd_IQ

* College variable (binary split of education based on 12 year basic+high school)
gen bin_edu=.
replace bin_edu=0 if education_years<=12
replace bin_edu=1 if education_years>12

* Gross wealth
gen wealth_untrimmed=(assets2003+assets2004+assets2005+assets2006+assets2007)/5000000
gen wealth_trimmed=wealth_untrimmed
winsor2 wealth_trimmed, cuts(0 99) trim replace
gen wealth_IHS = log(wealth_untrimmed+sqrt(1+wealth_untrimmed^2))

* Net wealth
gen wealth_net_untrimmed=(net_assets2003+net_assets2004+net_assets2005+net_assets2006+net_assets2007)/5000000
gen wealth_net_trimmed=wealth_net_untrimmed
winsor2 wealth_net_trimmed, cuts(0 99) trim replace
gen wealth_net_IHS = log(wealth_net_untrimmed+sqrt(1+wealth_net_untrimmed^2))


* Reduced dimensions used in robustness checks
gen dimension1 = decrease_public_sector + lower_taxes + sell_public_enterprise + more_private_healthcare + more_freeschools + more_freedom_companies
gen dimension2 = more_support_countryside + six_hour_workday
gen dimension3 = harder_punishment - more_skilled_immigration + language_test_citizenship + decrease_aid + fewer_refugees - more_support_immigrant_culture
gen dimension4 = decrease_pollution + less_carbondioxide
gen dimension5 = leave_eu - instate_euro - join_nato
forvalues i=1(1)5 {
	label variable dimension`i' "Dimension `i'"
}


* Imputation of missing parental birth years
gen f_by_imp=0
gen m_by_imp=0
replace f_by_imp=1 if father_BirthYear==.
replace m_by_imp=1 if mother_BirthYear==.
bysort BirthYear: egen mfby=mean(father_BirthYear)
bysort BirthYear: egen mmby=mean(mother_BirthYear)
replace father_BirthYear = round(mfby) if father_BirthYear==.
replace mother_BirthYear = round(mmby) if mother_BirthYear==.
drop mfby mmby

* Imputation of missing parental education
gen par_edu_imp=0
replace par_edu_imp=1 if parents_education==.
reg parents_education i.BirthYear i.modern_BirthMunicipality i.occupation if BirthYear>1942&BirthYear<1959
predict x
replace parents_education=x if parents_education==.
drop x

* Imputation of missing parental income
gen par_inc_imp=0
replace par_inc_imp=1 if parents_income==.
reg parents_income i.BirthYear i.modern_BirthMunicipality i.occupation if BirthYear>1942&BirthYear<1959
predict x
replace parents_income=x if parents_income==.
drop x

* Indicator variable for having all predictors (necessary to keep sample constant between models)
gen include_indicator = .
replace include_indicator = 1 if occupation!=. & modern_BirthMunicipality!=. & father_BirthYear!=. & mother_BirthYear!=. & Sex!=. & age!=. & income10_tr!=. & education_years!=. & parents_education!=. & parents_income!=.

* Label independent variables
label variable wealth_IHS "Gross wealth"
label variable wealth_net_IHS "Net wealth"
label variable income10_tr "Work income"
label variable education_years "Education years"
label variable bin_edu "College"
label variable extraversion_SALTY "Extraversion"
label variable LOC_SALTY "Locus of control"
label variable risk_preference "Risk preference"
label variable trust "Trust"
label variable antisocial "Antisocial attitudes"
label variable altruism "Altruism"
label variable utilitarian "Utilitarian"

* Label dependent variables
label variable decrease_public_sector "Decrease public sector"
label variable decrease_defense_spending "Decrease defense spending"
label variable decrease_welfare "Decrease social welfare"
label variable lower_taxes "Cut taxes"
label variable keep_property_taxes "Keep estate tax"
label variable sell_public_enterprise "Sell state companies"
label variable decrease_economic_inequality "Decrease economic inequality"
label variable more_private_healthcare "More private healthcare"
label variable decrease_finmarket_impact "Decrease financial market influence"
label variable keep_maxtaxa "Keep max child care fees"
label variable more_freeschools "More support for free schools"
label variable earlier_grades "Earlier grades in school"
label variable more_support_countryside "More rural support"
label variable six_hour_workday "Legislate six-hour workday"
label variable ban_pornography "Ban pornography"
label variable limit_abortion "Limit free abortion"
label variable harder_punishment "Harder prison sentences"
label variable better_animal_protection "Strengthen animal rights"
label variable no_nuclear_power "Abolish nuclear power"
label variable no_cars_in_cities "Ban cars in inner cities"
label variable decrease_pollution "Prevent environmental degradation"
label variable less_carbondioxide "Decrease carbon emissions"
label variable more_skilled_immigration "Increase labor immigration"
label variable language_test_citizenship "Language test f. citizenship"
label variable decrease_aid "Decrease foreign aid"
label variable fewer_refugees "Take fewer refugees"
label variable more_support_immigrant_culture "Support immigrant culture"
label variable abolish_debt "Abolish third world debt"
label variable more_freedom_companies "More freedom f. companies"
label variable leave_eu "Leave EU"
label variable instate_euro "Adobt the euro"
label variable join_nato "Join NATO"
label variable more_free_trade "Support free trade"
label variable support_war_on_terror "Support war on terror"


* Generate numbered and pairwise complete dependent variables
local i=1
foreach depvar in decrease_public_sector decrease_defense_spending decrease_welfare lower_taxes keep_property_taxes ///
	sell_public_enterprise decrease_economic_inequality more_private_healthcare decrease_finmarket_impact keep_maxtaxa ///
	more_freeschools earlier_grades more_support_countryside six_hour_workday ban_pornography limit_abortion harder_punishment ///
	better_animal_protection no_nuclear_power no_cars_in_cities decrease_pollution less_carbondioxide more_skilled_immigration ///
	language_test_citizenship decrease_aid fewer_refugees more_support_immigrant_culture abolish_debt more_freedom_companies ///
	leave_eu instate_euro join_nato more_free_trade support_war_on_terror {
	
		rename `depvar' outcome`i'
		label variable outcome`i' "`depvar'"
		order outcome`i'		
		bysort LopNrParID: replace outcome`i' = . if outcome`i'[1]==. | outcome`i'[2]==.
		
		local i = `i'+1
}
order outcome1 outcome2 outcome3 outcome4 outcome5 outcome6 outcome7 outcome8 outcome9 outcome10 ///
	outcome11 outcome12 outcome13 outcome14 outcome15 outcome16 outcome17 outcome18 outcome19 outcome20 ///
	outcome21 outcome22 outcome23 outcome24 outcome25 outcome26 outcome27 outcome28 outcome29 outcome30 ///
	outcome31 outcome32 outcome33 outcome34
	
* Replace with pairwise complete independent variables
foreach v in bin_edu extraversion_SALTY LOC_SALTY risk_preference education_years ///
	wealth_IHS wealth_net_IHS income10_tr trust antisocial altruism utilitarian stdIQ ///
	include_indicator {
	bysort LopNrParID: replace `v' = . if `v'[1]==. | `v'[2]==.
}

	
* Descriptive tables (for Online Appendix)
outreg2 using desc_dependent.tex, replace sum(log) tex(frag) ///
	keep(outcome1-outcome34) label

outreg2 using desc_independent.tex, replace sum(log) tex(frag) ///
	keep(bin_edu extraversion_SALTY LOC_SALTY risk_preference education_years ///
	wealth_IHS wealth_net_IHS income10_tr trust antisocial altruism utilitarian stdIQ) label



/*

Create matrices to contain model estimates and other information. Each matrix
contains two indices: the first, [i;], to denote each outcome, and the second,
[;j] to denote each model. The model slot indices are laid out as described below.
VU denotes "Valundersökning", i.e. the Swedish election study. BES denotes the
British Election Study. DES denotes the Danish Election Study. NES denotes
the Norwegian Election Study.

1. Main empty model
2. Main naive model
3. Main within model

4. VU-comparison empty model
5. VU-comparison naive model
6. VU-cmparison within model

7. Winners curse empty model based on empty
8. Winners curse naive model based on empty
9. Winners curse within model based on empty

10. Winners curse naive model based on naive
11. Winners curse within model based on naive

12. Beta threshold empty model based on empty
13. Beta threshold naive model based on empty
14. Beta threshold within model based on empty

15. Beta threshold naive model based on naive
16. Beta threshold within model based on naive

17. BES-comparison empty model
18. BES-comparison naive model
19. BES-comparison within model

20. DES-comparison empty model
21. DES-comparison naive model
22. DES-comparison within model

23. NES-comparison empty model
24. NES-comparison naive model
25. NES-comparison within model

26. Contact rate, within
27. Contact rate, interaction

28. Main model, naive, renormed (re 2)
29. Main model, within, renormed on naive (re 3)

30. VU-model, naive, renormed (re 5)
31. VU-model, within, renormed on naive (re 6)

32. Winners curse model, naive model based on naive, renormed (re 10)
33. Winners curse model, naive model based on naive, renormed on naive (re 11)

34. Beta selection model, naive model based on naive, renormed (re 15)
35. Beta selectionmodel, naive model based on naive, renormed on naive (re 16)

36. BES-model, naive, renormed (re 18)
37. BES-model, naive, renormed on naive (re 19)

38. DES-model, naive, renormed (re 21)
39. DES-model, naive, renormed on naive (re 22)

40. NES-model, naive, renormed (re 24)
41. NES-model, naive, renormed on naive (re 25)

42. Empty robust, standard
43. Empty robust, contact interaction
*/
foreach indepvar in wealth_IHS wealth_net_IHS income10_tr education_years bin_edu risk_preference trust extraversion_SALTY LOC_SALTY antisocial altruism utilitarian stdIQ {
	matrix mat_`indepvar'=J(34,50,.)
	matrix mat_`indepvar'_p=J(34,50,.)
	matrix mat_`indepvar'_beta=J(34,50,.)
	matrix mat_`indepvar'_se=J(34,50,.)
	matrix mat_`indepvar'_var=J(34,50,.)
	matrix mat_`indepvar'_t=J(34,50,.)
	matrix mat_`indepvar'_n=J(34,50,.)
	matrix mat_`indepvar'_wc=J(1,4,0)
}

/*
Create correlation matrices for outcome space.
This is necessary for calculating the meta-
analytical standard errors (i.e. eq. 7).
*/
pwcorr outcome1-outcome34 if zyg==1&include_indicator==1
matrix corr_mat=r(C)
matrix n_corr_mat=r(C)






********************
* ALL STR ANALYSES *
********************

local allindepvars = "bin_edu extraversion_SALTY LOC_SALTY risk_preference education_years wealth_IHS wealth_net_IHS income10_tr trust antisocial altruism utilitarian stdIQ"

* Run all outcomes
forvalues i=1(1)34 {

	* Run all predictors
	foreach indepvar in `allindepvars' {
	
		* Define model (control variables)
		local model = "c.Sex##i.age i.modern_BirthMunicipality i.father_BirthYear i.mother_BirthYear i.occupation parents_income parents_education income10_tr par_edu_imp par_inc_imp f_by_imp m_by_imp"
		if "`indepvar'"!="bin_edu" local model = "`model' education_years"
		
		* If you wish to obtain the alternative results in Table 6 in Appendix B, simply uncomment the immediately following line
		*local model = "c.Sex##i.age i.modern_BirthMunicipality i.father_BirthYear i.mother_BirthYear i.occupation parents_income parents_education par_edu_imp par_inc_imp f_by_imp m_by_imp"
		
		* Run and store results for all three models for this outcome-predictor combination
		get_estimates "outcome`i'" `indepvar' "c.Sex##i.age" 1 "noabsorb" `i'
		get_estimates "outcome`i'" `indepvar' "`model'" 2 "noabsorb" `i'
		get_estimates "outcome`i'" `indepvar' "`model'" 3 "absorb(LopNrParID)" `i'
		
		* Copy to new slots for later naive renormalization
		copymodel `indepvar' `i' 2 28
		copymodel `indepvar' `i' 3 29
		
		* Winners curse model selection
		if mat_`indepvar'_p[`i',1]<0.05 {
			matrix mat_`indepvar'_wc[1,1] = mat_`indepvar'_wc[1,1] + 1
			matrix mat_`indepvar'_beta[`i',7] = mat_`indepvar'_beta[`i',1]
			matrix mat_`indepvar'_beta[`i',8] = mat_`indepvar'_beta[`i',2]
			matrix mat_`indepvar'_beta[`i',9] = mat_`indepvar'_beta[`i',3]
			matrix mat_`indepvar'_se[`i',7] = mat_`indepvar'_se[`i',1]
			matrix mat_`indepvar'_se[`i',8] = mat_`indepvar'_se[`i',2]
			matrix mat_`indepvar'_se[`i',9] = mat_`indepvar'_se[`i',3]
			matrix mat_`indepvar'_var[`i',7] = mat_`indepvar'_var[`i',1]
			matrix mat_`indepvar'_var[`i',8] = mat_`indepvar'_var[`i',2]
			matrix mat_`indepvar'_var[`i',9] = mat_`indepvar'_var[`i',3]
		}
		if mat_`indepvar'_p[`i',2]<0.05 {
			matrix mat_`indepvar'_wc[1,2] = mat_`indepvar'_wc[1,2] + 1
			matrix mat_`indepvar'_beta[`i',10] = mat_`indepvar'_beta[`i',2]
			matrix mat_`indepvar'_se[`i',10] = mat_`indepvar'_se[`i',2]
			matrix mat_`indepvar'_var[`i',10] = mat_`indepvar'_var[`i',2]
			matrix mat_`indepvar'_beta[`i',11] = mat_`indepvar'_beta[`i',3]
			matrix mat_`indepvar'_se[`i',11] = mat_`indepvar'_se[`i',3]
			matrix mat_`indepvar'_var[`i',11] = mat_`indepvar'_var[`i',3]
			
			* Copy to new slots for naive renormed models
			copymodel `indepvar' `i' 2 32
			copymodel `indepvar' `i' 3 33
		}
		
		* Beta selection model selection
		if abs(mat_`indepvar'_beta[`i',1])>=0.1 {
			matrix mat_`indepvar'_wc[1,3] = mat_`indepvar'_wc[1,3] + 1
			matrix mat_`indepvar'_beta[`i',12] = mat_`indepvar'_beta[`i',1]
			matrix mat_`indepvar'_beta[`i',13] = mat_`indepvar'_beta[`i',2]
			matrix mat_`indepvar'_beta[`i',14] = mat_`indepvar'_beta[`i',3]
			matrix mat_`indepvar'_se[`i',12] = mat_`indepvar'_se[`i',1]
			matrix mat_`indepvar'_se[`i',13] = mat_`indepvar'_se[`i',2]
			matrix mat_`indepvar'_se[`i',14] = mat_`indepvar'_se[`i',3]
			matrix mat_`indepvar'_var[`i',12] = mat_`indepvar'_var[`i',1]
			matrix mat_`indepvar'_var[`i',13] = mat_`indepvar'_var[`i',2]
			matrix mat_`indepvar'_var[`i',14] = mat_`indepvar'_var[`i',3]
		}
		if abs(mat_`indepvar'_beta[`i',2])>=0.1 {
			matrix mat_`indepvar'_wc[1,4] = mat_`indepvar'_wc[1,4] +1
			matrix mat_`indepvar'_beta[`i',15] = mat_`indepvar'_beta[`i',2]
			matrix mat_`indepvar'_beta[`i',16] = mat_`indepvar'_beta[`i',3]
			matrix mat_`indepvar'_se[`i',15] = mat_`indepvar'_se[`i',2]
			matrix mat_`indepvar'_se[`i',16] = mat_`indepvar'_se[`i',3]
			matrix mat_`indepvar'_var[`i',15] = mat_`indepvar'_var[`i',2]
			matrix mat_`indepvar'_var[`i',16] = mat_`indepvar'_var[`i',3]
			
			* Copy to new slots for naive renormed models
			copymodel `indepvar' `i' 2 34
			copymodel `indepvar' `i' 3 35
		}
	}
}

* Robustness checks with reduced outcome space dimensions (Tables 7-11 in Appendix B)
forvalues i=1(1)5 {
	foreach indepvar in `allindepvars' {
		local model = "c.Sex##i.age i.modern_BirthMunicipality i.father_BirthYear i.mother_BirthYear i.occupation parents_income parents_education income10_tr par_edu_imp par_inc_imp f_by_imp m_by_imp"
		if "`indepvar'"!="bin_edu" local model = "`model' education_years"
		get_estimates "dimension`i'" `indepvar' "c.Sex##i.age" 44 "noabsorb" `i'
		get_estimates "dimension`i'" `indepvar' "`model'" 45 "noabsorb" `i'
		get_estimates "dimension`i'" `indepvar' "`model'" 46 "absorb(LopNrParID)" `i'
	}
}




**********************************
* Comparisons with external data *
**********************************

* VU
/*
This section runs the comparison models for the VU (i.e. Swedish election study) data.
The results for the models using the actual VU data is found in a separate file (VU.do).
*/

* Create new outcome space using only the outcomes that match (described in the Online Appendix)
local vl = "outcome1 outcome2 outcome4 outcome6 outcome7 outcome8 outcome10 outcome13 outcome14 outcome15 outcome17 outcome19 outcome23 outcome25 outcome26 outcome27 outcome30 outcome31 outcome32 outcome33 outcome34"
local j=1
foreach i of varlist outcome1 outcome2 outcome4 outcome6 outcome7 outcome8 outcome10 outcome13 outcome14 outcome15 outcome17 outcome19 outcome23 outcome25 outcome26 outcome27 outcome30 outcome31 outcome32 outcome33 outcome34 {
	gen VU_outcome`j' = `i'
	local j=`j'+1
}

* Create new indicator for having all required independent variables
replace include_indicator=.
replace include_indicator=1 if occupation!=. & modern_BirthMunicipality!=. & Sex!=. & age!=. & income10_tr!=. & education_years!=.

* Correlation matrix of the new outcome space
pwcorr VU_outcome1-VU_outcome21 if include_indicator==1
matrix VU_corr_mat=r(C)
matrix VU_n_corr_mat=VU_corr_mat
local i=0
foreach depvar of varlist VU_outcome1-VU_outcome21 {
	local i = `i' + 1
	foreach indepvar in education_years bin_edu income10_tr {
		di `i' ", `depvar', `indepvar'"
		if "`indepvar'" == "bin_edu"|"`indepvar'" == "education_years" {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 4 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr" 5 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr" 6 "absorb(LopNrParID)" `i'
		}
		else {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 4 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr education_years" 5 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr education_years" 6 "absorb(LopNrParID)" `i'
		}
		copymodel `indepvar' `i' 5 30
		copymodel `indepvar' `i' 6 31
	}
}


* BES
/*
This section runs the comparison models for the British election study data.
The results for the models using the actual BES data is found in a separate file (BES.do).
*/

* Create new outcome space using only the outcomes that match (described in the Online Appendix)
local v1 = "outcome1 outcome3 outcome4 outcome6 outcome7 outcome21 outcome26 outcome30"
local j=1
foreach i of varlist outcome1 outcome3 outcome4 outcome6 outcome7 outcome21 outcome26 outcome30 {
	gen BES_outcome`j' = `i'
	local j=`j'+1
}

* Create new indicator for having all required independent variables
replace include_indicator=.
replace include_indicator=1 if occupation!=. & modern_BirthMunicipality!=. & Sex!=. & age!=. & income10_tr!=. & education_years!=.

* Correlation matrix of the new outcome space
pwcorr BES_outcome1-BES_outcome8 if include_indicator==1
matrix BES_corr_mat=r(C)
matrix BES_n_corr_mat=BES_corr_mat
local i=0
foreach depvar of varlist BES_outcome1-BES_outcome8 {
	local i = `i' + 1
	foreach indepvar in education_years bin_edu income10_tr {
		di `i' ", `depvar', `indepvar'"
		if "`indepvar'" == "bin_edu"|"`indepvar'" == "education_years" {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 17 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr" 18 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr" 19 "absorb(LopNrParID)" `i'
		}
		else {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 17 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr education_years" 18 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr education_years" 19 "absorb(LopNrParID)" `i'
		}
		copymodel `indepvar' `i' 18 36
		copymodel `indepvar' `i' 19 37
	}
}


* DES
/*
This section runs the comparison models for the Danish election study data.
The results for the models using the actual DES data is found in a separate file (DES.do).
*/

* Create new outcome space using only the outcomes that match (described in the Online Appendix)
local v1 = "outcome1 outcome2 outcome7 outcome8 outcome26 outcome4 outcome21 outcome29 outcome25 outcome17 outcome34 outcome23 outcome30"
local j=1
foreach i of varlist outcome1 outcome2 outcome7 outcome8 outcome26 outcome4 outcome21 outcome29 outcome25 outcome17 outcome34 outcome23 outcome30 {
	gen DES_outcome`j' = `i'
	local j=`j'+1
}

* Create new indicator for having all required independent variables
replace include_indicator=.
replace include_indicator=1 if occupation!=. & modern_BirthMunicipality!=. & Sex!=. & age!=. & income10_tr!=. & education_years!=.

* Correlation matrix of the new outcome space
pwcorr DES_outcome1-DES_outcome13 if include_indicator==1
matrix DES_corr_mat=r(C)
matrix DES_n_corr_mat=DES_corr_mat
local i=0
foreach depvar of varlist DES_outcome1-DES_outcome13 {
	local i = `i' + 1
	foreach indepvar in education_years bin_edu income10_tr trust {
		di `i' ", `depvar', `indepvar'"
		if "`indepvar'" == "bin_edu"|"`indepvar'" == "education_years" {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 20 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr" 21 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr" 22 "absorb(LopNrParID)" `i'
		}
		if "`indepvar'" == "income10_tr" {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 20 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality education_years" 21 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality education_years" 22 "absorb(LopNrParID)" `i'
		}
		else {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 20 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr education_years" 21 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr education_years" 22 "absorb(LopNrParID)" `i'
		}
		copymodel `indepvar' `i' 21 38
		copymodel `indepvar' `i' 22 39
	}
}

* NES
/*
This section runs the comparison models for the Norwegian election study data.
The results for the models using the actual NES data is found in a separate file (NES.do).
*/

* Create new outcome space using only the outcomes that match (described in the Online Appendix)
local v1 = "outcome25 outcome21 outcome4 outcome9 outcome26 outcome30 outcome22 outcome7 outcome2 outcome16 outcome11 outcome29 outcome6 outcome3"
local j=1
foreach i of varlist outcome25 outcome21 outcome4 outcome9 outcome26 outcome30 outcome22 outcome7 outcome2 outcome16 outcome11 outcome29 outcome6 outcome3 {
	gen NES_outcome`j' = `i'
	local j=`j'+1
}

* Create new indicator for having all required independent variables
replace include_indicator=.
replace include_indicator=1 if occupation!=. & modern_BirthMunicipality!=. & Sex!=. & age!=. & income10_tr!=. & education_years!=.

* Correlation matrix of the new outcome space
pwcorr NES_outcome1-NES_outcome14 if include_indicator==1
matrix NES_corr_mat=r(C)
matrix NES_n_corr_mat=NES_corr_mat
local i=0
foreach depvar of varlist NES_outcome1-NES_outcome14 {
	local i = `i' + 1
	foreach indepvar in education_years bin_edu income10_tr {
		di `i' ", `depvar', `indepvar'"
		if "`indepvar'" == "bin_edu"|"`indepvar'" == "education_years" {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 23 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr" 24 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality income10_tr" 25 "absorb(LopNrParID)" `i'
		}
		else {
			get_estimates `depvar' `indepvar' "c.Sex##i.age" 23 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality education_years" 24 "noabsorb" `i'
			get_estimates `depvar' `indepvar' "c.Sex##i.age i.occupation i.modern_BirthMunicipality education_years" 25 "absorb(LopNrParID)" `i'
		}
		copymodel `indepvar' `i' 24 40
		copymodel `indepvar' `i' 25 41
	}
}





**************************************
* ROBUSTNESS MODEL WITH CONTACT RATE *
**************************************

* Mean contact rate per pair
bysort LopNrParID: egen mc = mean(contact)

* Generate within-pair differences (model is run at the pair level)
local allindepvars = "bin_edu extraversion_SALTY LOC_SALTY risk_preference education_years wealth_IHS wealth_net_IHS income10_tr trust antisocial altruism utilitarian stdIQ"
foreach indepvar in `allindepvars' {
	bysort LopNrParID: egen m_`indepvar' = mean(`indepvar')
	bysort LopNrParID: gen d_`indepvar' = 2 * (`indepvar' - m_`indepvar')
	drop m_`indepvar'
}
bysort LopNrParID: gen d_occ=abs(occupation[1]-occupation[2])
replace d_occ=1 if d_occ
replace include_indicator=.
replace include_indicator=1 if d_income10_tr!=.&d_education_years!=.&d_occ!=.&mc!=.

* Correlation matrix of the outcome space
pwcorr outcome1-outcome34 if include_indicator==1
matrix ROBUST_corr_mat=r(C)
matrix ROBUST_e_corr_mat=ROBUST_corr_mat

matrix robust_empty=J(34,1,.)

* Run robustness models for all predictors and outcomes
local allindepvars = "bin_edu extraversion_SALTY LOC_SALTY risk_preference education_years wealth_IHS wealth_net_IHS income10_tr trust antisocial altruism utilitarian stdIQ"
forvalues i=1(1)34 {
	foreach indepvar in `allindepvars' {
		di "CONTACT ROBUSTNESS: `indepvar' and outcome `i'"
		
		if "`indepvar'" == "bin_edu" | "`indepvar'" == "education_years" {
			local model = "d_income10_tr d_occ"
		}
		else {
			if "`indepvar'" == "income10_tr" {
				local model = "d_occ d_education_years"
			}
			else {
				local model = "d_income10_tr d_occ d_education_years"
			}
		}
		
		* Standardize
		qui:su outcome`i' if include_indicator==1 & `indepvar'!=.
		qui:gen temp_y = (outcome`i'-r(mean))/r(sd)
		qui:replace temp_y=. if include_indicator==. | `indepvar'==.
		qui:su `indepvar' if include_indicator==1 & outcome`i'!=.
		qui:gen temp_x = (`indepvar'-r(mean))/r(sd)
		qui:replace temp_x=. if include_indicator==. | outcome`i'==.
		
		* Empty, standard
		qui: reg temp_y c.temp_x if include_indicator==1
		matrix mat_`indepvar'_beta[`i',42]=_b[temp_x]
		matrix mat_`indepvar'_se[`i',42]=_se[temp_x]
		matrix mat_`indepvar'_var[`i',42]=_se[temp_x]^2
		matrix mat_`indepvar'_n[`i',42]=e(N)
		
		* Empty, contact interaction
		qui: reg temp_y c.temp_x##c.mc if include_indicator==1
		matrix mat_`indepvar'_beta[`i',43]=_b[temp_x]
		matrix mat_`indepvar'_se[`i',43]=_se[temp_x]
		matrix mat_`indepvar'_var[`i',43]=_se[temp_x]^2
		matrix mat_`indepvar'_n[`i',43]=e(N)
		
		* Within-transform
		qui:bysort LopNrParID: egen m = mean(temp_y)
		qui:bysort LopNrParID: gen yy = 2 * (temp_y-m)
		qui:replace yy = . if TwinNr==2
		drop m
		qui:bysort LopNrParID: egen m = mean(temp_x)
		qui:bysort LopNrParID: gen xx = 2 * (temp_x-m)
		qui:replace xx = . if TwinNr==2
		drop m
		
		* Within, standard
		qui: reg yy xx `model' if include_indicator==1
		matrix mat_`indepvar'_beta[`i',26]=_b[xx]
		matrix mat_`indepvar'_se[`i',26]=_se[xx]
		matrix mat_`indepvar'_var[`i',26]=_se[xx]^2
		matrix mat_`indepvar'_n[`i',26]=e(N)
		
		* Within, contact interaction
		qui: reg yy c.xx##c.mc `model' if include_indicator==1
		matrix mat_`indepvar'_beta[`i',27]=_b[xx]
		matrix mat_`indepvar'_se[`i',27]=_se[xx]
		matrix mat_`indepvar'_var[`i',27]=_se[xx]^2
		matrix mat_`indepvar'_n[`i',27]=e(N)
		
		drop yy xx temp_y temp_x
	}
}







*******************************************
* TURN RESULT MATRICES INTO FINAL DATASET *
*******************************************

* Save the correlation matrices (these are necessary to calculate the standard errors in part 2)
drop _all
local matrices = "corr_mat n_corr_mat VU_n_corr_mat VU_corr_mat BES_n_corr_mat BES_corr_mat DES_n_corr_mat DES_corr_mat NES_n_corr_mat NES_corr_mat ROBUST_e_corr_mat ROBUST_corr_mat"
foreach m in `matrices' {
	svmat `m'
}
save "matrices.dta", replace

* Now, turn all result matrices into the final dataset
drop _all
local allindepvars = "bin_edu extraversion_SALTY LOC_SALTY risk_preference education_years wealth_IHS wealth_net_IHS income10_tr trust antisocial altruism utilitarian stdIQ"
foreach indepvar in `allindepvars' {

	qui:svmat mat_`indepvar'_p
	qui:svmat mat_`indepvar'_beta
	qui:svmat mat_`indepvar'_se
	qui:svmat mat_`indepvar'_var
	qui:svmat mat_`indepvar'_n
	qui:svmat mat_`indepvar'_wc
	set obs 50
}

/*
At this point, we save this final dataset. The following file is available on the
Dataverse. From this point on, the analysis follows in analysis_part2 and
it is possible to follow from there without having access to the raw individual
level data.
The alternative data file "finaldata_alt.dta" can be used if Table 6 in Appendix B
(with alternative controls) is to be produced instead of the main results.
*/
save "finaldata.dta", replace
*save "finaldata_alt.dta", replace


